{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "a33c5c7a-6d2f-4f38-b72a-ff5f07896184",
   "metadata": {
    "ExecutionIndicator": {
     "show": true
    },
    "tags": []
   },
   "outputs": [],
   "source": [
    "!pip install llama-index llama-index-llms-huggingface ipywidgets\n",
    "!pip install transformers -U"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "fd3b2a78-5782-4f76-8d09-52b6b07a96b8",
   "metadata": {
    "ExecutionIndicator": {
     "show": true
    },
    "execution": {
     "iopub.execute_input": "2024-02-21T05:49:50.997974Z",
     "iopub.status.busy": "2024-02-21T05:49:50.997681Z",
     "iopub.status.idle": "2024-02-21T05:49:54.378226Z",
     "shell.execute_reply": "2024-02-21T05:49:54.377769Z",
     "shell.execute_reply.started": "2024-02-21T05:49:50.997954Z"
    },
    "tags": []
   },
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "2024-02-21 13:49:53,743 - modelscope - INFO - PyTorch version 2.1.2+cu121 Found.\n",
      "2024-02-21 13:49:53,745 - modelscope - INFO - TensorFlow version 2.14.0 Found.\n",
      "2024-02-21 13:49:53,746 - modelscope - INFO - Loading ast index from /mnt/workspace/.cache/modelscope/ast_indexer\n",
      "2024-02-21 13:49:53,746 - modelscope - INFO - No valid ast index found from /mnt/workspace/.cache/modelscope/ast_indexer, generating ast index from prebuilt!\n",
      "2024-02-21 13:49:53,803 - modelscope - INFO - Loading done! Current index file version is 1.12.0, with md5 509123dba36c5e70a95f6780df348471 and a total number of 964 components indexed\n"
     ]
    }
   ],
   "source": [
    "import logging\n",
    "import sys\n",
    "\n",
    "logging.basicConfig(stream=sys.stdout, level=logging.INFO)\n",
    "logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout))\n",
    "\n",
    "\n",
    "from IPython.display import Markdown, display\n",
    "import torch\n",
    "from llama_index.llms.huggingface import HuggingFaceLLM\n",
    "from llama_index.core.prompts import PromptTemplate\n",
    "from modelscope import snapshot_download\n",
    "from llama_index.core.base.embeddings.base import BaseEmbedding, Embedding\n",
    "from abc import ABC\n",
    "from typing import Any, List, Optional, Dict, cast\n",
    "from llama_index.core import (\n",
    "    VectorStoreIndex,\n",
    "    ServiceContext,\n",
    "    set_global_service_context,\n",
    "    SimpleDirectoryReader,\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "c8375e4c-21c3-433c-a7b1-945007a73ac2",
   "metadata": {
    "ExecutionIndicator": {
     "show": true
    },
    "execution": {
     "iopub.execute_input": "2024-02-21T05:49:57.097256Z",
     "iopub.status.busy": "2024-02-21T05:49:57.096804Z",
     "iopub.status.idle": "2024-02-21T05:50:38.941821Z",
     "shell.execute_reply": "2024-02-21T05:50:38.941368Z",
     "shell.execute_reply.started": "2024-02-21T05:49:57.097233Z"
    },
    "tags": []
   },
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Downloading: 100%|██████████| 662/662 [00:00<00:00, 6.94MB/s]\n",
      "Downloading: 100%|██████████| 51.0/51.0 [00:00<00:00, 586kB/s]\n",
      "Downloading: 100%|██████████| 178/178 [00:00<00:00, 2.13MB/s]\n",
      "Downloading: 100%|██████████| 1.59M/1.59M [00:00<00:00, 27.9MB/s]\n",
      "Downloading: 100%|█████████▉| 3.72G/3.72G [00:08<00:00, 449MB/s]\n",
      "Downloading: 100%|█████████▉| 3.64G/3.64G [00:11<00:00, 336MB/s]\n",
      "Downloading: 100%|██████████| 38.7k/38.7k [00:00<00:00, 40.0MB/s]\n",
      "Downloading: 100%|██████████| 4.13k/4.13k [00:00<00:00, 5.90MB/s]\n",
      "Downloading: 100%|██████████| 6.70M/6.70M [00:00<00:00, 121MB/s]\n",
      "Downloading: 100%|██████████| 1.13k/1.13k [00:00<00:00, 12.4MB/s]\n",
      "Downloading: 100%|██████████| 2.65M/2.65M [00:00<00:00, 91.6MB/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "INFO:accelerate.utils.modeling:We will use 90% of the memory on device 0 for storing the model, and 10% for the buffer to avoid OOM. You can set `max_memory` in to a higher value to use more memory (at your own risk).\n",
      "We will use 90% of the memory on device 0 for storing the model, and 10% for the buffer to avoid OOM. You can set `max_memory` in to a higher value to use more memory (at your own risk).\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "875c92489c8047c7881342f422f47c79",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.\n"
     ]
    }
   ],
   "source": [
    "# Model names \n",
    "qwen2_4B_CHAT = \"qwen/Qwen1.5-4B-Chat\"\n",
    "\n",
    "selected_model = snapshot_download(qwen2_4B_CHAT)\n",
    "\n",
    "SYSTEM_PROMPT = \"\"\"You are a helpful AI assistant.\n",
    "\"\"\"\n",
    "\n",
    "query_wrapper_prompt = PromptTemplate(\n",
    "    \"[INST]<<SYS>>\\n\" + SYSTEM_PROMPT + \"<</SYS>>\\n\\n{query_str}[/INST] \"\n",
    ")\n",
    "\n",
    "llm = HuggingFaceLLM(\n",
    "    context_window=4096,\n",
    "    max_new_tokens=2048,\n",
    "    generate_kwargs={\"temperature\": 0.0, \"do_sample\": False},\n",
    "    query_wrapper_prompt=query_wrapper_prompt,\n",
    "    tokenizer_name=selected_model,\n",
    "    model_name=selected_model,\n",
    "    device_map=\"auto\",\n",
    "    # change these settings below depending on your GPU\n",
    "    model_kwargs={\"torch_dtype\": torch.float16},\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "38d1acab-e916-459b-9a11-e39a63751d47",
   "metadata": {
    "ExecutionIndicator": {
     "show": true
    },
    "execution": {
     "iopub.execute_input": "2024-02-21T05:51:00.938021Z",
     "iopub.status.busy": "2024-02-21T05:51:00.937708Z",
     "iopub.status.idle": "2024-02-21T05:51:01.687136Z",
     "shell.execute_reply": "2024-02-21T05:51:01.686435Z",
     "shell.execute_reply.started": "2024-02-21T05:51:00.937998Z"
    },
    "tags": []
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "--2024-02-21 13:51:01--  https://modelscope.oss-cn-beijing.aliyuncs.com/resource/rag/xianjiaoda.md\n",
      "正在解析主机 modelscope.oss-cn-beijing.aliyuncs.com (modelscope.oss-cn-beijing.aliyuncs.com)... 8.131.208.119\n",
      "正在连接 modelscope.oss-cn-beijing.aliyuncs.com (modelscope.oss-cn-beijing.aliyuncs.com)|8.131.208.119|:443... 已连接。\n",
      "已发出 HTTP 请求，正在等待回应... 200 OK\n",
      "长度： 13228 (13K) [text/markdown]\n",
      "正在保存至: ‘data/xianjiaoda/xianjiaoda.md’\n",
      "\n",
      "data/xianjiaoda/xia 100%[===================>]  12.92K  --.-KB/s    用时 0s      \n",
      "\n",
      "2024-02-21 13:51:01 (31.7 MB/s) - 已保存 ‘data/xianjiaoda/xianjiaoda.md’ [13228/13228])\n",
      "\n"
     ]
    }
   ],
   "source": [
    "!mkdir -p 'data/xianjiaoda/'\n",
    "!wget 'https://modelscope.oss-cn-beijing.aliyuncs.com/resource/rag/xianjiaoda.md' -O 'data/xianjiaoda/xianjiaoda.md'"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "75ffc74f-a732-4748-8cb8-481cd8a39f81",
   "metadata": {
    "ExecutionIndicator": {
     "show": true
    },
    "tags": []
   },
   "outputs": [],
   "source": [
    "# load documents\n",
    "documents = SimpleDirectoryReader(\"/mnt/workspace/data/xianjiaoda/\").load_data()\n",
    "documents"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "id": "5689eeaa-8d2c-4df5-9165-abde5d1b3702",
   "metadata": {
    "ExecutionIndicator": {
     "show": true
    },
    "execution": {
     "iopub.execute_input": "2024-02-21T05:51:07.044053Z",
     "iopub.status.busy": "2024-02-21T05:51:07.043752Z",
     "iopub.status.idle": "2024-02-21T05:51:07.051731Z",
     "shell.execute_reply": "2024-02-21T05:51:07.051278Z",
     "shell.execute_reply.started": "2024-02-21T05:51:07.044036Z"
    },
    "tags": []
   },
   "outputs": [],
   "source": [
    "embedding_model = \"damo/nlp_gte_sentence-embedding_chinese-base\"\n",
    "class ModelScopeEmbeddings4LlamaIndex(BaseEmbedding, ABC):\n",
    "    embed: Any = None\n",
    "    model_id: str = \"damo/nlp_gte_sentence-embedding_chinese-base\"\n",
    "\n",
    "    def __init__(\n",
    "            self,\n",
    "            model_id: str,\n",
    "            **kwargs: Any,\n",
    "    ) -> None:\n",
    "        super().__init__(**kwargs)\n",
    "        try:\n",
    "            from modelscope.models import Model\n",
    "            from modelscope.pipelines import pipeline\n",
    "            from modelscope.utils.constant import Tasks\n",
    "            # 使用modelscope的embedding模型（包含下载）\n",
    "            self.embed = pipeline(Tasks.sentence_embedding, model=self.model_id)\n",
    "\n",
    "        except ImportError as e:\n",
    "            raise ValueError(\n",
    "                \"Could not import some python packages.\" \"Please install it with `pip install modelscope`.\"\n",
    "            ) from e\n",
    "\n",
    "    def _get_query_embedding(self, query: str) -> List[float]:\n",
    "        text = query.replace(\"\\n\", \" \")\n",
    "        inputs = {\"source_sentence\": [text]}\n",
    "        return self.embed(input=inputs)['text_embedding'][0].tolist()\n",
    "\n",
    "    def _get_text_embedding(self, text: str) -> List[float]:\n",
    "        text = text.replace(\"\\n\", \" \")\n",
    "        inputs = {\"source_sentence\": [text]}\n",
    "        return self.embed(input=inputs)['text_embedding'][0].tolist()\n",
    "\n",
    "    def _get_text_embeddings(self, texts: List[str]) -> List[List[float]]:\n",
    "        texts = list(map(lambda x: x.replace(\"\\n\", \" \"), texts))\n",
    "        inputs = {\"source_sentence\": texts}\n",
    "        return self.embed(input=inputs)['text_embedding'].tolist()\n",
    "\n",
    "    async def _aget_query_embedding(self, query: str) -> List[float]:\n",
    "        return self._get_query_embedding(query)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "id": "8590cf73-bb5b-498c-993d-d24f15aad77e",
   "metadata": {
    "ExecutionIndicator": {
     "show": true
    },
    "execution": {
     "iopub.execute_input": "2024-02-21T05:51:09.906919Z",
     "iopub.status.busy": "2024-02-21T05:51:09.906610Z",
     "iopub.status.idle": "2024-02-21T05:51:17.813191Z",
     "shell.execute_reply": "2024-02-21T05:51:17.812713Z",
     "shell.execute_reply.started": "2024-02-21T05:51:09.906901Z"
    },
    "tags": []
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "INFO:datasets:PyTorch version 2.1.2+cu121 available.\n",
      "PyTorch version 2.1.2+cu121 available.\n",
      "INFO:datasets:TensorFlow version 2.14.0 available.\n",
      "TensorFlow version 2.14.0 available.\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "2024-02-21 13:51:10,907 - modelscope - WARNING - Model revision not specified, use revision: v1.1.0\n",
      "Downloading: 100%|██████████| 917/917 [00:00<00:00, 6.18MB/s]\n",
      "Downloading: 100%|██████████| 2.29k/2.29k [00:00<00:00, 23.5MB/s]\n",
      "Downloading: 100%|██████████| 60.7k/60.7k [00:00<00:00, 26.3MB/s]\n",
      "Downloading: 100%|██████████| 195M/195M [00:00<00:00, 383MB/s] \n",
      "Downloading: 100%|██████████| 11.4k/11.4k [00:00<00:00, 40.4MB/s]\n",
      "Downloading: 100%|██████████| 125/125 [00:00<00:00, 684kB/s]\n",
      "Downloading: 100%|██████████| 429k/429k [00:00<00:00, 20.8MB/s]\n",
      "Downloading: 100%|██████████| 366/366 [00:00<00:00, 4.25MB/s]\n",
      "2024-02-21 13:51:15,095 - modelscope - INFO - initiate model from /mnt/workspace/.cache/modelscope/damo/nlp_gte_sentence-embedding_chinese-base\n",
      "2024-02-21 13:51:15,096 - modelscope - INFO - initiate model from location /mnt/workspace/.cache/modelscope/damo/nlp_gte_sentence-embedding_chinese-base.\n",
      "2024-02-21 13:51:15,096 - modelscope - INFO - initialize model from /mnt/workspace/.cache/modelscope/damo/nlp_gte_sentence-embedding_chinese-base\n",
      "/opt/conda/lib/python3.10/site-packages/torch/_utils.py:831: UserWarning: TypedStorage is deprecated. It will be removed in the future and UntypedStorage will be the only storage class. This should only matter to you if you are using storages directly.  To access UntypedStorage directly, use tensor.untyped_storage() instead of tensor.storage()\n",
      "  return self.fget.__get__(instance, owner)()\n",
      "2024-02-21 13:51:15,741 - modelscope - WARNING - No preprocessor field found in cfg.\n",
      "2024-02-21 13:51:15,742 - modelscope - WARNING - No val key and type key found in preprocessor domain of configuration.json file.\n",
      "2024-02-21 13:51:15,742 - modelscope - WARNING - Cannot find available config to build preprocessor at mode inference, current config: {'model_dir': '/mnt/workspace/.cache/modelscope/damo/nlp_gte_sentence-embedding_chinese-base'}. trying to build by task and model information.\n",
      "2024-02-21 13:51:15,762 - modelscope - WARNING - No preprocessor field found in cfg.\n",
      "2024-02-21 13:51:15,762 - modelscope - WARNING - No val key and type key found in preprocessor domain of configuration.json file.\n",
      "2024-02-21 13:51:15,763 - modelscope - WARNING - Cannot find available config to build preprocessor at mode inference, current config: {'model_dir': '/mnt/workspace/.cache/modelscope/damo/nlp_gte_sentence-embedding_chinese-base', 'sequence_length': 128}. trying to build by task and model information.\n",
      "/tmp/ipykernel_442/427817804.py:2: DeprecationWarning: Call to deprecated class method from_defaults. (ServiceContext is deprecated, please use `llama_index.settings.Settings` instead.) -- Deprecated since version 0.10.0.\n",
      "  service_context = ServiceContext.from_defaults(embed_model=embeddings, llm=llm)\n",
      "/opt/conda/lib/python3.10/site-packages/transformers/modeling_utils.py:993: FutureWarning: The `device` argument is deprecated and will be removed in v5 of Transformers.\n",
      "  warnings.warn(\n"
     ]
    }
   ],
   "source": [
    "embeddings = ModelScopeEmbeddings4LlamaIndex(model_id=embedding_model)\n",
    "service_context = ServiceContext.from_defaults(embed_model=embeddings, llm=llm)\n",
    "set_global_service_context(service_context)\n",
    "\n",
    "index = VectorStoreIndex.from_documents(documents)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "id": "df218d21-9ad1-42f3-b44c-47aa56f6edcf",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-02-21T05:51:20.557315Z",
     "iopub.status.busy": "2024-02-21T05:51:20.556991Z",
     "iopub.status.idle": "2024-02-21T05:51:20.610136Z",
     "shell.execute_reply": "2024-02-21T05:51:20.609707Z",
     "shell.execute_reply.started": "2024-02-21T05:51:20.557297Z"
    },
    "tags": []
   },
   "outputs": [],
   "source": [
    "# set Logging to DEBUG for more detailed outputs\n",
    "query_engine = index.as_query_engine()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "id": "10c8c01f-c923-4234-a93e-c37a39358f5b",
   "metadata": {
    "ExecutionIndicator": {
     "show": true
    },
    "execution": {
     "iopub.execute_input": "2024-02-21T05:59:18.934204Z",
     "iopub.status.busy": "2024-02-21T05:59:18.933908Z",
     "iopub.status.idle": "2024-02-21T05:59:19.777534Z",
     "shell.execute_reply": "2024-02-21T05:59:19.777054Z",
     "shell.execute_reply.started": "2024-02-21T05:59:18.934187Z"
    },
    "tags": []
   },
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "2000年国务院决定将西安交通大学、西安医科大学、陕西财经学院三校合并，组建新的西安交通大学\n"
     ]
    }
   ],
   "source": [
    "response = query_engine.query(\"西安交大是由哪几个学校合并的?\")\n",
    "print(response)\n",
    "#display(Markdown(f\"<b>{response}</b>\"))"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.13"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
